# JSTOR Data Analysis

rm(list = ls())

# print date and time of run
print("Starting date and time")
Sys.time()

library(ggplot2)

# data folder
data_folder <- "INSERT HERE"

# images directory
images_directory <- "INSERT HERE"

# grayscale for the graphics or not
#print_colormodel <- "srgb" # in color
print_colormodel <- "grey"


# import the data
j_all <- read.csv(paste0(data_folder, "2017_07_16_jstor_poli_sci_discipline_all.csv"), 
                  stringsAsFactors=FALSE)
j_se <- read.csv(paste0(data_folder, "2017_07_16_jstor_poli_sci_discipline_survey_experiment.csv"), 
                 stringsAsFactors = FALSE)
names(j_se)[2] <- "SURVEY_EXPERIMENT_COUNT"
jstor <- merge(x = j_se, y = j_all, all.x = TRUE)
# proportion of political science articles that use survey experiments
jstor$prop <- (jstor[,2]/jstor[,3])

# counts
ggplot(data = jstor[jstor$YEAR>1970 & jstor$YEAR < 2013,], aes(x = YEAR, 
                                           y = SURVEY_EXPERIMENT_COUNT)) + 
  geom_point() + geom_smooth(se = FALSE) + theme_bw() + xlab("Year") +
  ylab("Number of Articles")

# percentages
ggplot(data = jstor[jstor$YEAR>1970 & jstor$YEAR < 2013,], aes(x = YEAR, y = prop)) + 
  geom_point() + geom_smooth(se = FALSE) + theme_bw() +
  scale_x_continuous(breaks = seq(from = 1975, to = 2015, by = 5), name = "Year") +
  scale_y_continuous(labels = scales::percent, limits = c(0, 0.01), 
                     name = "Percent of Articles")
ggsave(paste0(images_directory, "Supplementary_Appendix_Figure_1.pdf"), 
       width = 6, height = 2.5, dpi = 300, colormodel = print_colormodel)


